import requests
from bs4 import BeautifulSoup
import os
import time
import json
import csv
import re

# os.chdir(r"C://Users//dell//Desktop//dlp_hw2")

class Ieee2():
    def __init__(self,url='https://ieeexplore.ieee.org/rest/search',key='bert'):
        self.url = url
        # self.key = input('please input a word:')
        self.key = key
        self.data = {
            'highlight': 'true',
            'matchPubs': 'true',
            'newsearch': 'true',
            'queryText': self.key,
            'returnFacets': ["ALL"],
            'returnType': "SEARCH"
        }
        self.headers = {
            'Accept': 'application/json,text/plain,*/*',
            'Accept-Encoding': 'gzip,deflate,br',
            'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
            'Connection': 'keep-alive',
            'Content-Type': 'application/json',
            'Referer': 'https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText=' + self.key,
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

    def get_page(self):
        response = requests.post(url=self.url, data=json.dumps(self.data), headers=self.headers, verify=False)
        response.raise_for_status()  # 确保请求成功
        self.response = response.text
        time.sleep(2)
        self.n = len(json.loads(self.response)['records'])
        print(type(self.n))
        print('搜索到' + str(self.n) + '篇文章')
        self.get_id()

    def get_id(self):
        self.ids = []
        self.titles = []
        self.authors = []
        for i in range(self.n):
            id = json.loads(self.response)['records'][i]['articleNumber']
            title = json.loads(self.response)['records'][i]['articleTitle']
            author = json.loads(self.response)['records'][i]['authors']
            self.ids.append(id)
            self.titles.append(title)
            self.authors.append(author)
        self.down_pdf()

    def down_pdf(self):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        requests.packages.urllib3.disable_warnings()
        #限制次数
        times=10
        i=0
        for id, title, author in zip(self.ids, self.titles, self.authors):
            i=i+1
            new_url = 'https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber={}&ref='.format(id)
            print(new_url)
            text = requests.get(new_url, headers=headers).content
            re.sub(r'[\\/:*?"<>|-]', '_', title)
            imgName = title + '.pdf'
            imgPath = './ieee_Quan/' + imgName
            with open(imgPath, 'wb') as fp:
                fp.write(text)
                print(imgName, '打印成功')
            with open('./ieee_Quan/ieee.csv', 'a+', newline='', encoding='utf-8') as fp:
                writer = csv.writer(fp)
                writer.writerow([title, author])
            if i==times:
                break

    def main(self):
        if not os.path.exists('./ieee_Quan'):
            os.mkdir('ieee_Quan')
        with open('./ieee_Quan/ieee.csv', 'a+', newline='', encoding='utf-8') as fp:
            writer = csv.writer(fp)
            writer.writerow(['title', 'author'])
        self.get_page()

if __name__=='__main__':
    ieee2 = Ieee2()
    ieee2.main()
